library(tidyverse)
library(ggplot2)
library(tidyverse)
library(kableExtra)
library(reshape2)
library(dplyr)
library(network)
library(ggraph)
library(GGally)
library(sna)
library(ggplot2)
library(ggnetwork)
library(ggraph)
library(tidygraph)
library(ggsci)
library(data.table)
library(reshape2)
library(tidymodels)
library(furrr)
library(R.matlab)
library(BiocParallel)
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
library(gtools)
library(graphlayouts)

net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/results/inh/"
expression_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/input/"
resources_dir = params$resources_dir

# BN folders 
data_prefix = "inh_m06"
bn_results_dir = "/pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/inh_modules/inh_m06/"
BN_run_dir = "/pastel/resources/bayesian_networks/CINDERellA/"
BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")
cat("Preparing phenotypes...")
## Preparing phenotypes...
### phenotype
phenotype = readRDS(paste0(resources_dir, "phenotypes/basic_Apr2022.rds"))
phenotype_dt = phenotype$data
metadata = phenotype$data

### Create custom phenotypes and includes resilience
phenotype_dt$ad_dementia_status = ifelse(phenotype_dt$cogdx_3gp<3,0,1)
phenotype_dt$cognitive_impairment_status = ifelse(phenotype_dt$cogdx_3gp<2,0,1)
phenotype_dt$gpath_sqrt = sqrt(phenotype_dt$gpath)
phenotype_dt$amyloid_sqrt = sqrt(phenotype_dt$amyloid)
phenotype_dt$tangles_sqrt = sqrt(phenotype_dt$tangles)
phenotype_dt$nft_sqrt = sqrt(phenotype_dt$nft)
phenotype_dt$plaq_d_sqrt = sqrt(phenotype_dt$plaq_d)
phenotype_dt$plaq_n_sqrt = sqrt(phenotype_dt$plaq_n)
phenotype_dt$tdp_43_binary = phenotype_dt$tdp_st2
phenotype_dt$dxpark_status = phenotype_dt$dxpark-1
resilience = read.csv(paste0(resources_dir, "resilience/resilience_capuano_july2022/R719_CR_ROSMAP.csv"))
resilience$projid2 = sprintf("%08d", resilience$projid) # Add leading zeros 
phenotype_dt = phenotype_dt %>% dplyr::left_join(resilience[,-1], by = c("projid"="projid2"))

save(phenotype_dt, file = paste0(bn_results_dir,"phenotypes.RData"))

cat("Saved:", paste0(bn_results_dir,"phenotypes.RData"))
## Saved: /pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/inh_modules/inh_m06/phenotypes.RData

Expression data

cat("Loading SE modules...")
## Loading SE modules...
mod2plot = 6 # Module we want to plot
# pval_cutoff = 0.05  # p-value cutoff for the BN
top_n_genes = 100  # Number of top genes to include in the BN

# Expression data for a single set:
exprData = read.table(paste0(expression_dir, "inh.txt"), header = T, stringsAsFactors = F, check.names = F)
expr_matx = as.data.frame(exprData) # Residuals of the expression
gene_modules = read.table(paste0(net_dir, "geneBycluster.txt"), header = T, stringsAsFactors = F) # clusters from SpeakEasy

# Select the expression values for the module of interest 
to_plot = gene_modules$ensembl[gene_modules$cluster_lv3 == mod2plot]
expr_matx_mod = expr_matx[to_plot, ]

save(expr_matx_mod, file = paste0(bn_results_dir,"dataExp.RData"))
cat("\nSelecting modules based on association results...")
## 
## Selecting modules based on association results...
load(paste0(bn_results_dir,"phenotypes.RData"))
load(paste0(bn_results_dir,"dataExp.RData"))

phenotype_match = phenotype_dt[match(colnames(expr_matx_mod),phenotype_dt$projid),]
expr_matx_mod_t <- as.matrix(t(expr_matx_mod))

#identical(rownames(data4linear_reg),phenotype_match$projid) # TRUE

#Covariates
covariates_ = c( "cogng_demog_slope",
                "tangles_sqrt", 
                "amyloid_sqrt")
adj_by = c("msex","age_death")

data4linear_reg = cbind(expr_matx_mod_t,phenotype_match[,covariates_,drop=F],phenotype_match[,adj_by,drop=F])
scale_data = TRUE
matrix_pvalue = matrix(NA, nrow = length(covariates_), ncol = ncol(expr_matx_mod_t))
for (x in 1:length(covariates_)){
  for (y in 1:ncol(expr_matx_mod_t)){
    x_cov = covariates_[x]
    y_mod = colnames(data4linear_reg)[y]
    if(scale_data){
      form = as.formula(paste(paste0("scale(",y_mod,")"),"~",paste0("scale(",x_cov,")"),"+",paste(adj_by,collapse = "+")))  
    }else{
      form = as.formula(paste(y_mod,"~",x_cov,"+",paste(adj_by,collapse = "+")))
    }
    matrix_pvalue[x,y] <- coef(summary( lm(form, data = data4linear_reg) ))[1,"Pr(>|t|)"]
  } 
}
rownames(matrix_pvalue) = covariates_
colnames(matrix_pvalue) = colnames(expr_matx_mod_t)
matrix_pvalue_adj = matrix(p.adjust(as.vector(as.matrix(matrix_pvalue)), method='bonferroni'),ncol=ncol(matrix_pvalue))

matrix_pvalue_m = reshape2::melt(as.matrix(matrix_pvalue)) %>% 
  dplyr::group_by(Var2) %>% dplyr::summarise(pval = min(value)) %>% arrange(pval)

top_genes = as.character(matrix_pvalue_m$Var2[1:top_n_genes])

# Save the input expression for the BN
write_csv(as.data.frame(expr_matx_mod)[top_genes,], file = BN_run_data, col_names = F)
cat("\nSaved:", BN_run_data)
## 
## Saved: /pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/inh_modules/inh_m06/inh_m06_exp.txt
# CINDERellA has two inputs: exp.txt and output_folder 
setwd(BN_run_dir)
cmd_matlab_call = paste0("matlab -nodisplay -nojvm -nosplash -nodesktop -r")
cmd_matlab_param = paste0("runt=5000; data='",BN_run_data,"'; out_dir='",BN_output_dir,"'; run('",BN_run_dir,"CINDERellA.m')")
cmd_matlab_run = paste0(cmd_matlab_call, " \"",cmd_matlab_param,"\"")
cat(cmd_matlab_run)
system(cmd_matlab_run)
## [1] "/pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/inh_modules/inh_m06/inh_m06_res"

Read results

Edges filtered

# BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
# BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")

# load(file = paste0(bn_results_dir,data_prefix,"_BN_input.RData")) # phenotype_match,gene_net_metadata,mod_eigengen
# load(file = paste0(bn_results_dir,data_prefix,"_dataExp.RData"))

edgefrq = read_tsv(paste0(BN_output_dir,"/edgefrq.txt"), col_names = c("A","B","freq"), show_col_types = F)

dataExp = expr_matx_mod
edges_df = na.omit(cbind(edgefrq, rownames(dataExp)[edgefrq$A], rownames(dataExp)[edgefrq$B]))
colnames(edges_df) = c("fromNode", "toNode", "weight", "fromAltName", "toAltName") # match names for Cytoscape input

edges_df$fromAltName = gsub("(.*)\\.(.*)","\\1",edges_df$fromAltName)
edges_df$toAltName = gsub("(.*)\\.(.*)","\\1",edges_df$toAltName)

edges_filtered = edges_df[abs(edges_df$weight)>0.45, ] # weight default = 0.33
rownames(edges_filtered) = NULL

# createDT(edges_filtered %>% arrange(-weight))

Nodes filtered

nodes_table = data.frame(nodeName = 1:nrow(dataExp), altName = gsub("(.*)\\.(.*)","\\1",rownames(dataExp))) %>% distinct()
nodes_table$altName = gsub("(.*)\\.(.*)","\\1",nodes_table$altName)
rownames(nodes_table) = NULL
nodes_table = na.omit(unique(nodes_table)) %>% left_join(unique(gene_modules[,c("ensembl","gene_name")]), by = c("altName"="ensembl"))

nodes_filtered = nodes_table[nodes_table$altName %in% unique(c(edges_filtered$fromAltName,edges_filtered$toAltName)), ]
nodes_filtered = nodes_filtered[! duplicated(nodes_filtered$altName), ] 
createDT(nodes_filtered %>% left_join(reshape2::melt(as.matrix(matrix_pvalue)), by = c("altName"="Var2")) %>%
  group_by(altName) %>% mutate(best_pval = min(value), 
                               best_pheno = Var1[which.min(value)],
                               pvalues = paste0(formatC(value, format = "e", digits = 2), collapse = ";"), 
                               phenotypes = paste0(Var1, collapse = ";")) %>% 
  select(nodeName, altName, gene_name, best_pval, best_pheno, pvalues, phenotypes) %>%
  distinct() %>% arrange(best_pval))

BN plot

# Cairo::CairoPDF(file = "/pastel/Github_scripts/SpeakEasy_dlpfc/figures4paper/v3_may2024/bn_inh06.pdf", width = 16, height = 10)
plot_geneBN(edges_filtered = edges_filtered,
          nodes_filtered = nodes_filtered)
## Using `stress` as default layout

# dev.off()

Session

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggforce_0.3.3         igraph_2.0.3          graphlayouts_0.8.0   
##  [4] gtools_3.9.4          RColorBrewer_1.1-3    circlize_0.4.16      
##  [7] ComplexHeatmap_2.15.4 BiocParallel_1.28.3   R.matlab_3.6.2       
## [10] furrr_0.3.1           future_1.33.2         yardstick_1.2.0      
## [13] workflowsets_1.0.1    workflows_1.1.3       tune_1.1.2           
## [16] rsample_1.2.0         recipes_1.0.8         parsnip_1.1.1        
## [19] modeldata_1.2.0       infer_1.0.5           dials_1.2.0          
## [22] scales_1.3.0          broom_1.0.5           tidymodels_1.1.1     
## [25] data.table_1.15.4     ggsci_3.0.3           tidygraph_1.2.0      
## [28] ggnetwork_0.5.13      sna_2.7-2             statnet.common_4.9.0 
## [31] GGally_2.1.2          ggraph_2.0.5          network_1.18.2       
## [34] reshape2_1.4.4        kableExtra_1.3.4      lubridate_1.9.3      
## [37] forcats_1.0.0         stringr_1.5.1         dplyr_1.1.4          
## [40] purrr_1.0.2           readr_2.1.5           tidyr_1.3.1          
## [43] tibble_3.2.1          ggplot2_3.5.0         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] shadowtext_0.1.1    backports_1.4.1     systemfonts_1.0.6  
##   [4] plyr_1.8.9          splines_4.1.2       websocket_1.4.1    
##   [7] crosstalk_1.2.1     listenv_0.9.1       digest_0.6.35      
##  [10] foreach_1.5.2       htmltools_0.5.8.1   viridis_0.6.2      
##  [13] fansi_1.0.6         magrittr_2.0.3      cluster_2.1.2      
##  [16] doParallel_1.0.17   tzdb_0.4.0          globals_0.16.3     
##  [19] gower_1.0.1         matrixStats_1.3.0   vroom_1.6.5        
##  [22] R.utils_2.12.2      svglite_2.1.3       hardhat_1.3.0      
##  [25] timechange_0.3.0    colorspace_2.1-0    rvest_1.0.4        
##  [28] ggrepel_0.9.5       xfun_0.43           crayon_1.5.2       
##  [31] jsonlite_1.8.8      survival_3.2-13     iterators_1.0.14   
##  [34] glue_1.7.0          polyclip_1.10-6     gtable_0.3.4       
##  [37] ipred_0.9-14        webshot_0.5.2       GetoptLong_1.0.5   
##  [40] shape_1.4.6.1       future.apply_1.11.2 BiocGenerics_0.40.0
##  [43] Rcpp_1.0.12         viridisLite_0.4.2   clue_0.3-65        
##  [46] bit_4.0.5           GPfit_1.0-8         DT_0.30            
##  [49] stats4_4.1.2        lava_1.7.2.1        prodlim_2023.08.28 
##  [52] htmlwidgets_1.6.4   httr_1.4.7          pkgconfig_2.0.3    
##  [55] reshape_0.8.9       R.methodsS3_1.8.2   farver_2.1.1       
##  [58] nnet_7.3-16         sass_0.4.9          chromote_0.2.0     
##  [61] utf8_1.2.4          labeling_0.4.3      tidyselect_1.2.1   
##  [64] rlang_1.1.3         DiceDesign_1.9      later_1.3.2        
##  [67] munsell_0.5.1       tools_4.1.2         cachem_1.0.8       
##  [70] cli_3.6.2           generics_0.1.3      evaluate_0.23      
##  [73] fastmap_1.1.1       yaml_2.3.8          bit64_4.0.5        
##  [76] processx_3.8.4      knitr_1.46          R.oo_1.25.0        
##  [79] xml2_1.3.6          compiler_4.1.2      rstudioapi_0.16.0  
##  [82] png_0.1-8           lhs_1.1.6           tweenr_1.0.2       
##  [85] bslib_0.7.0         stringi_1.8.3       highr_0.10         
##  [88] ps_1.7.6            lattice_0.20-45     Matrix_1.6-1.1     
##  [91] vctrs_0.6.5         pillar_1.9.0        lifecycle_1.0.4    
##  [94] GlobalOptions_0.1.2 jquerylib_0.1.4     R6_2.5.1           
##  [97] promises_1.3.0      gridExtra_2.3       IRanges_2.28.0     
## [100] parallelly_1.37.1   codetools_0.2-18    MASS_7.3-54        
## [103] rjson_0.2.21        withr_3.0.0         S4Vectors_0.32.4   
## [106] parallel_4.1.2      hms_1.1.3           rpart_4.1-15       
## [109] timeDate_4022.108   coda_0.19-4.1       class_7.3-19       
## [112] rmarkdown_2.26